library(tidyverse)
## ─ Attaching packages ──────────────────── tidyverse 1.3.1 ─
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.4 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 2.0.1 ✓ forcats 0.5.1
## ─ Conflicts ───────────────────── tidyverse_conflicts() ─
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(leaflet)
library(p8105.datasets)
knitr::opts_chunk$set(
fig.width = 6,
fig.asp = .6,
out.width = "90%"
)
theme_set(theme_minimal() + theme(legend.position = "bottom"))
options(
ggplot2.continuous.colour = "viridis",
ggplot2.continuous.fill = "viridis"
)
scale_colour_discrete = scale_colour_viridis_d
scale_fill_discrete = scale_fill_viridis_d
nyc_airbnb <-
nyc_airbnb %>%
mutate(stars = review_scores_location/2) %>%
rename(borough = neighbourhood_group)
Lets try to count things
# make a table
nyc_airbnb %>%
count(borough,room_type) %>%
pivot_wider(
names_from = room_type,
values_from = n
)
## # A tibble: 5 × 4
## borough `Entire home/apt` `Private room` `Shared room`
## <chr> <int> <int> <int>
## 1 Bronx 192 429 28
## 2 Brooklyn 7427 9000 383
## 3 Manhattan 10814 7812 586
## 4 Queens 1388 2241 192
## 5 Staten Island 116 144 1
#janitor::tabyl(borough,room_type)
ggplot(nyc_airbnb,aes(x=long,y=lat,color=borough))+
geom_point(size = 0.1)
pal <- colorNumeric(
palette = "viridis",
domain = nyc_airbnb$stars)
nyc_airbnb %>%
na.omit(stars) %>%
mutate(
click_label =
str_c("<b>$", price, "</b><br>", stars, " stars<br>", number_of_reviews, " reviews")) %>%
leaflet() %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addCircleMarkers(~lat, ~long, radius = .1, color = ~pal(stars), popup = ~click_label)
let’s lo0k at price and room type
nyc_airbnb %>%
ggplot(aes(x = stars, y = price)) +
geom_point()+
facet_grid(.~room_type)
## Warning: Removed 10037 rows containing missing values (geom_point).
let’s look at price and neighborhood
nyc_airbnb %>%
filter(borough == "Manhattan") %>%
group_by(neighbourhood) %>%
summarize(mean_price = mean(price,na.rm =TRUE)) %>%
arrange(mean_price)
## # A tibble: 32 × 2
## neighbourhood mean_price
## <chr> <dbl>
## 1 Marble Hill 83.6
## 2 Inwood 86.5
## 3 Washington Heights 90.1
## 4 Morningside Heights 107.
## 5 Harlem 117.
## 6 Roosevelt Island 122.
## 7 Two Bridges 123.
## 8 East Harlem 128.
## 9 Chinatown 158.
## 10 Upper East Side 172.
## # … with 22 more rows
nyc_airbnb %>%
filter(
borough == "Manhattan",
price <= 1000) %>%
mutate(
neighbourhood = fct_reorder(neighbourhood,price)
) %>%
ggplot(aes(x = neighbourhood,y = price))+
geom_boxplot()+
coord_flip()+
facet_grid(.~room_type)
获得地铁站的经纬度,结合现有数据,进行爱彼迎公寓距离地铁距离的分析
price vs location
nyc_airbnb %>%
filter(price<500) %>%
sample_n(5000) %>%
ggplot(aes(x= lat, y=long,color=price))+
geom_point(size = 0.1)
pal <- colorNumeric("viridis",NULL)
label <- paste0("<b>", nyc_airbnb$name, "</b><br>$", nyc_airbnb$price)
nyc_airbnb %>%
filter(price<500) %>%
sample_n(1000) %>%
leaflet() %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addCircleMarkers(~lat,~long,radius = 1,color = ~pal(price),popup = ~ label)